library("tidyverse")
library("broom")
library("ggrepel")
library("here")Data analysis 1
Modeling
augmented_table <- read_tsv("../data/03_augmented_table.tsv")Rows: 2570560 Columns: 17
── Column specification ────────────────────────────────────────────────────────
Delimiter: "\t"
chr (10): GeneFeature, Sample_names, Patient, IDH_status, Localisation, Prim...
dbl (7): Quantile, CPM, GEP_score, Age, Batch, TLS_status_bin, CPM_log2
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
augmented_metadata <- read_tsv("../data/03_augmented_metadata.tsv")Rows: 64 Columns: 13
── Column specification ────────────────────────────────────────────────────────
Delimiter: "\t"
chr (9): Sample_names, Patient, IDH_status, Localisation, Primary_or_Recurre...
dbl (4): GEP_score, Age, Batch, TLS_status_bin
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#MODEL 1: CPM log2 transformed ~ TLS_status
nested_data <- augmented_table |>
group_by(GeneFeature) |>
nest() |>
mutate(model = map(data,
~lm(CPM_log2 ~ TLS_status_bin,
data = .x)),
tidy_model = map(model,
~tidy(.x, conf.int = TRUE)))gene_results <- nested_data |>
unnest(tidy_model) |>
filter(term == "TLS_status_bin") |>
select(GeneFeature,
estimate,
p.value,
conf.low,
conf.high) |>
mutate(q.value = p.adjust(p.value,
method = "fdr"),
significant = q.value < 0.05)#MODEL 2: GEP_score ~ TLS_status
gep_model <- lm(GEP_score ~ TLS_status_bin,
data = augmented_metadata)
gep_model_tidy <- tidy(gep_model,
conf.int = TRUE)#Volcano plot for all genes
volcano_plot <- gene_results |>
drop_na(estimate,
p.value,
significant,
GeneFeature) |>
mutate(neg_log10_p = -log10(p.value),
label = if_else(significant,
GeneFeature, "")) |>
ggplot(aes(x = estimate,
y = neg_log10_p,
color = significant)) +
geom_point(alpha = 0.6) +
geom_text_repel(aes(label = label),
size = 2,
max.overlaps = 20) +
geom_hline(yintercept = 0,
linetype = "dotted") +
labs(title = "Volcano Plot of Gene Effects (CPM ~ TLS_status)",
x = "Effect size (Estimate)",
y = expression(-log[10](p.value)),
color = "Significant" ) +
theme_minimal()#Print volcano plot
volcano_plotWarning: ggrepel: 3679 unlabeled data points (too many overlaps). Consider
increasing max.overlaps
ggsave(
filename = "volcano_plot.png",
path = here("results"),
dpi = 300,
bg = "white"
)Saving 7 x 5 in image
Warning: ggrepel: 3681 unlabeled data points (too many overlaps). Consider
increasing max.overlaps
#Extracting p value from model 2
tls_p <- gep_model_tidy |>
filter(term == "TLS_status_bin") |>
pull(p.value)#Plotting GEP score vs TLS status
GEP_score_vs_TLS_status <- ggplot(
augmented_metadata,
aes(x = factor(TLS_status_bin),
y = GEP_score,
fill = factor(TLS_status_bin))) +
geom_boxplot(width = 0.5,
alpha = 0.8,
color = "black") +
scale_x_discrete(labels = c("0" = "No", "1" = "Yes")) +
geom_jitter(
aes(color = factor(TLS_status_bin)),
width = 0.07,
alpha = 0.6,
size = 2
) +
scale_fill_manual(values = c("0" = "lightsteelblue2",
"1" = "indianred3")) +
scale_color_manual(values = c("0" = "lightsteelblue4",
"1" = "indianred4")) +
labs(
title = paste0(
"GEP Score by TLS Status (p value = ", signif(tls_p, 3), ")"
),
x = "TLS Status",
y = "GEP Score") +
theme_classic(base_size = 15) +
theme(legend.position = "none",
plot.title = element_text(face = "bold",
hjust = 0.5),
axis.title = element_text(face = "bold"))
GEP_score_vs_TLS_statusggsave(filename = "GEP_score_vs_TLS_status.png",
path = here("results"),
dpi = 300)Saving 7 x 5 in image
#Checking if genes found significant by TLS status overlap with the 18 signature GEP genes
signature_genes <- c(
"CCL5", "CD27", "CD274", "CD276", "CD8A", "CMKLR1", "CXCL9",
"CXCR6", "HLA-DQA1", "HLA-DRB1", "HLA-E", "IDO1", "LAG3",
"NKG7", "PDCD1LG2", "PSMB10", "STAT1", "TIGIT"
)
tls_sig_genes <- gene_results |>
filter(significant) |>
pull(GeneFeature)
overlap_genes <- intersect(signature_genes,
tls_sig_genes)
overlap_genes[1] "CCL5" "CD27" "CXCR6"
length(overlap_genes)[1] 3
length(signature_genes)[1] 18
overlap_table <- tibble(Gene = signature_genes,
In_TLS_Significant = signature_genes %in% tls_sig_genes
)
overlap_table# A tibble: 18 × 2
Gene In_TLS_Significant
<chr> <lgl>
1 CCL5 TRUE
2 CD27 TRUE
3 CD274 FALSE
4 CD276 FALSE
5 CD8A FALSE
6 CMKLR1 FALSE
7 CXCL9 FALSE
8 CXCR6 TRUE
9 HLA-DQA1 FALSE
10 HLA-DRB1 FALSE
11 HLA-E FALSE
12 IDO1 FALSE
13 LAG3 FALSE
14 NKG7 FALSE
15 PDCD1LG2 FALSE
16 PSMB10 FALSE
17 STAT1 FALSE
18 TIGIT FALSE
#Plotting overlapping genes
siginifcant_genes_overlap <- ggplot(overlap_table, aes(
x = In_TLS_Significant,
y = Gene,
color = In_TLS_Significant
)) +
geom_point(size = 4) +
scale_x_discrete(labels = c("FALSE" = "No", "TRUE" = "Yes")) +
scale_color_manual(values = c("FALSE" = "lightsteelblue4", "TRUE" = "indianred3")) +
labs(title = "Which Signature Genes Are TLS-Significant?",
x = "",
y = "Gene") +
theme_minimal(base_size = 13) +
theme(
legend.position = "none",
panel.grid.major.y = element_line(color = "grey70", linewidth = 0.5),
panel.grid.major.x = element_line(color = "grey70", linewidth = 0.5),
panel.grid.minor.y = element_line(color = "grey70", linewidth = 0.3),
panel.grid.minor.x = element_line(color = "grey70", linewidth = 0.3),
)
siginifcant_genes_overlapggsave(
filename = "significant_genes_overlap.png",
path = here("results"),
dpi = 300,
bg = "white"
)Saving 7 x 5 in image